import requests
from lxml import etree
import time
import os
import re
 
def huoquyuanma(url = 'https://www.tujigu.com/'):
    headers = {
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.8',
        'Cache-Control': 'max-age=0',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
        'Connection': 'keep-alive',
        'Referer': 'http://www.baidu.com/'
            }
    try:
        new = requests.get(url , headers=headers , timeout = 16)  #获取源码
        time.sleep(2)                                       #等待
        new.encoding = new.apparent_encoding   #防乱码
        t = new.text
        t = etree.HTML(t)   #转换成xml格式
        return t
    except:
        print(f'获取{url}页面失败，已经放弃！')
        pass
 

 
def huoqutujimingcheng(url):
    url1 = url
    t = huoquyuanma(url1)  # 获取页面源码
    tujimingc = t.xpath('//div[@class="tuji"]/div[@class="weizhi"]/h1/text()')   #获取图集标题
    tup = t.xpath('//div[@class="tuji"]/p[3]/text()')    #获取图集内图片数量
    pppp = tup[0].split('P')        #分割文本去掉数字后的”p“
    mp = pppp[0].split(' ')        #分割文本 获得图集内图片数量的数字
    link = t.xpath('//*[@id="pages"]/a/@href') 
    zishuliang = t.xpath('//*[@id="pages"]/text()/following-sibling::a/text()')         #子页面总数量
    # print(link)
    return mp[1] , tujimingc ,link,zishuliang[-2]   #把图集内图片数量和图集名称返回
 
zongji = 0  
# print('采集的图集数量最小为1')
count = input('请输入需要采集的图集数量:')
 
for i in range(int(count)):                 #在6-最大图集编号中间循环
    tuji = 'https://www.tujigu.com/a/' + str(i+11986) + '/'          #构造url
    print(tuji)
    aaa = huoqutujimingcheng(tuji)                              #调用函数取得图集名称和图集内图片数量及全部的图集子页面url

    print(aaa)

    mingcheng = 1                        #实在不知道咋命名了……随便弄了个变量命名!
    name = re.sub('\s' , '' , aaa[1][0])           #清理一下获取到的图集名，免得命名文件夹的时候出现奇怪的问题
    path = 'D:/tujigu/'+str(name)                                   #构造存储路径
    if not os.path.exists(path):               
        os.makedirs(path)                       #如果路径不存在就创建 
    ss = int(aaa[3])
    for j in range(1,ss+1):
        if j == 1:
            ht =  'https://www.tujigu.com/a/' + str(i+11986) + '/'           #第一页后面加后缀不能访问
            print(ht)
        else:
            ht =  'https://www.tujigu.com/a/' + str(i+11986) + '/' + str(j) + '.html'      #构造第二页以后的页面



        piclist = huoquyuanma(ht).xpath('//div[@class="content"]/img/@src')            #获取当前页面下所有的图片列表
        for m in piclist:
            # print(f'正在采集{aaa[1][0]}的第{mingcheng}张，一共{aaa[0]}张')
            tupian = requests.get(m)                   
            with open(path + '/' + str(mingcheng) + '.jpg' , 'wb') as f:                 #写入图片
                f.write(tupian.content)
            zongji += 1
            mingcheng += 1                                    
             
